# -*- coding: utf-8 -*-
from openchemie import OpenChemIE
import os

def extract_figures_and_tables():
    """
    演示如何从PDF文档中提取所有图片和表格。
    """
    print("Initializing OpenChemIE...")
    # 初始化模型，第一次运行时会自动下载所需模型
    model = OpenChemIE()
    print("Initialization complete.")

    pdf_path = 'example/acs.joc.2c00749.pdf'
    
    if not os.path.exists(pdf_path):
        print(f"Error: Example PDF not found at '{pdf_path}'")
        return

    print(f"\nProcessing PDF: {pdf_path}")

    # 1. 提取所有图片 (figures)
    figures = model.extract_figures_from_pdf(pdf_path)
    print(f"\nFound {len(figures)} figures.")

    # 保存第一张提取到的图片作为示例
    if figures and figures[0].get('figure', {}).get('image'):
        first_figure_image = figures[0]['figure']['image']
        output_path = "extracted_figure_1.png"
        first_figure_image.save(output_path)
        print(f"Saved the first extracted figure as {output_path}")

    # 2. 提取所有表格 (tables)
    tables = model.extract_tables_from_pdf(pdf_path)
    print(f"\nFound {len(tables)} tables.")
    
    # 打印第一张提取到的表格内容作为示例
    if tables and tables[0].get('table', {}).get('content'):
        first_table_content = tables[0]['table']['content']
        print("\n--- Content of the first extracted table ---")
        print(first_table_content)
        print("------------------------------------------")

if __name__ == "__main__":
    extract_figures_and_tables()
